── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)#usar filterlibrary(sf)
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(spdep)
Loading required package: spData
To access larger datasets in this package, install the spDataLarge
package with: `install.packages('spDataLarge',
repos='https://nowosad.github.io/drat/', type='source')`
library(jsonlite)
Attaching package: 'jsonlite'
The following object is masked from 'package:purrr':
flatten
Loading required package: nlme
Attaching package: 'nlme'
The following object is masked from 'package:dplyr':
collapse
This is mgcv 1.9-3. For overview type 'help("mgcv-package")'.
library(car)
Loading required package: carData
Attaching package: 'car'
The following object is masked from 'package:purrr':
some
The following object is masked from 'package:dplyr':
recode
Loading required package: zoo
Attaching package: 'zoo'
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
######################### Warning from 'xts' package ##########################
# #
# The dplyr lag() function breaks how base R's lag() function is supposed to #
# work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
# source() into this session won't work correctly. #
# #
# Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
# conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
# dplyr from breaking base R's lag() function. #
# #
# Code in packages is not affected. It's protected by R's namespace mechanism #
# Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
# #
###############################################################################
Attaching package: 'xts'
The following object is masked from 'package:leaflet':
addLegend
The following objects are masked from 'package:dplyr':
first, last
library(vtable)
Loading required package: kableExtra
Attaching package: 'kableExtra'
The following object is masked from 'package:dplyr':
group_rows
library(glmmTMB)
Datos y Limpieza
qqnorm pero qqbeta. x cuantiles de la beta de un continente y en y cuntiles de la muestra y en otro color los cuantiles de la normal
A continuación, se procede a la carga de las bases de datos correspondientes al continente africano. Se han utilizado dos data frames separados, ya que durante el proceso de descarga algunos países no fueron seleccionados inicialmente. Por este motivo, se optó por mantener ambas bases de datos por separado para conservar la coherencia y facilitar su posterior manejo.
IDHAFRICA1<-read_xlsx("IDHAFRICA.xlsx")IDHAFRICAFALTANTES1<-read_xlsx("AFRICAF.xlsx")# Añadir la columna "CONTINENTE"IDHAFRICA1$CONTINENTE <-"AFRICA"IDHAFRICAFALTANTES1$CONTINENTE <-"AFRICA"
Rows: 182 Columns: 31
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): Country / Territory, ISO3, Region
dbl (28): 2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
years_to_normalize <-as.character(1995:2011)# Multiplicar por 10 los valores de esos añosCPI[years_to_normalize] <- CPI[years_to_normalize] *10CPI <-CPI [,-3]
IDHAFRICA<- IDHAFRICA %>%pivot_wider(names_from = indicator, # Los valores de la columna 'indicator' serán las nuevas columnasvalues_from = value # Los valores de la columna 'value' serán los que se asignen a las nuevas columnas )# Ver el resultadohead(IDHAFRICA)
# A tibble: 6 × 10
countryIsoCode country year CONTINENTE `Expected Years of Schooling (years)`
<chr> <chr> <chr> <chr> <dbl>
1 AGO Angola 2000 AFRICA 4.82
2 AGO Angola 2001 AFRICA 5.25
3 AGO Angola 2002 AFRICA 5.68
4 AGO Angola 2003 AFRICA 6.10
5 AGO Angola 2004 AFRICA 6.53
6 AGO Angola 2005 AFRICA 6.96
# ℹ 5 more variables: `Gross National Income Per Capita (2017 PPP$)` <dbl>,
# `Human Development Index (value)` <dbl>, `HDI Rank` <dbl>,
# `Life Expectancy at Birth (years)` <dbl>,
# `Mean Years of Schooling (years)` <dbl>
#sigo sin juntarlo porque ocurren problemasIDHAFRICAFALTANTES<- IDHAFRICAFALTANTES%>%pivot_wider(names_from = indicator, # Los valores de la columna 'indicator' serán las nuevas columnasvalues_from = value # Los valores de la columna 'value' serán los que se asignen a las nuevas columnas )# Ver el resultadohead(IDHAFRICAFALTANTES)
# A tibble: 6 × 10
countryIsoCode country year CONTINENTE Expected Years of Schooling (y…¹
<chr> <chr> <chr> <chr> <dbl>
1 CIV Côte d'Ivoire 2000 AFRICA 6.49
2 CIV Côte d'Ivoire 2001 AFRICA 6.62
3 CIV Côte d'Ivoire 2002 AFRICA 6.74
4 CIV Côte d'Ivoire 2003 AFRICA 6.87
5 CIV Côte d'Ivoire 2004 AFRICA 7.00
6 CIV Côte d'Ivoire 2005 AFRICA 7.13
# ℹ abbreviated name: ¹`Expected Years of Schooling (years)`
# ℹ 5 more variables: `Gross National Income Per Capita (2017 PPP$)` <dbl>,
# `Human Development Index (value)` <dbl>, `HDI Rank` <dbl>,
# `Life Expectancy at Birth (years)` <dbl>,
# `Mean Years of Schooling (years)` <dbl>
# Guardar el resultado en un nuevo archivowrite.csv(IDHAFRICAFALTANTES, "IDHAFRICAFALTANTES_pivoted.csv", row.names =FALSE)
IDHEUROPA<- IDHEUROPA %>%pivot_wider(names_from = indicator, # Los valores de la columna 'indicator' serán las nuevas columnasvalues_from = value # Los valores de la columna 'value' serán los que se asignen a las nuevas columnas )# Ver el resultadohead(IDHEUROPA)
# A tibble: 6 × 10
countryIsoCode country year CONTINENTE `Expected Years of Schooling (years)`
<chr> <chr> <chr> <chr> <dbl>
1 ALB Albania 2000 EUROPA 10.7
2 ALB Albania 2001 EUROPA 10.9
3 ALB Albania 2002 EUROPA 11.0
4 ALB Albania 2003 EUROPA 11.4
5 ALB Albania 2004 EUROPA 11.5
6 ALB Albania 2005 EUROPA 12.2
# ℹ 5 more variables: `Gross National Income Per Capita (2017 PPP$)` <dbl>,
# `Human Development Index (value)` <dbl>, `HDI Rank` <dbl>,
# `Life Expectancy at Birth (years)` <dbl>,
# `Mean Years of Schooling (years)` <dbl>
CARACTEUROPA<- CARACTEUROPA %>%pivot_wider(names_from = indicator, # Los valores de la columna 'indicator' serán las nuevas columnasvalues_from = value # Los valores de la columna 'value' serán los que se asignen a las nuevas columnas )# Ver el resultadohead(CARACTEUROPA)
# A tibble: 6 × 16
countryIsoCode country year CONTINENTE Carbon dioxide emissions per capita …¹
<chr> <chr> <chr> <chr> <dbl>
1 ALB Albania 1990 EUROPA 1.68
2 ALB Albania 1991 EUROPA 1.30
3 ALB Albania 1992 EUROPA 0.762
4 ALB Albania 1993 EUROPA 0.708
5 ALB Albania 1994 EUROPA 0.584
6 ALB Albania 1995 EUROPA 0.636
# ℹ abbreviated name:
# ¹`Carbon dioxide emissions per capita (production) (tonnes)`
# ℹ 11 more variables: `Coefficient of human inequality` <dbl>,
# `Gender Inequality Index (value)` <dbl>, `Inequality in eduation` <dbl>,
# `Inequality in income` <dbl>, `Inequality in life expectancy` <dbl>,
# `Labour force participation rate, female (% ages 15 and older)` <dbl>,
# `Labour force participation rate, male (% ages 15 and older)` <dbl>, …
CARACTAFRICA<- CARACTAFRICA %>%pivot_wider(names_from = indicator, # Los valores de la columna 'indicator' serán las nuevas columnasvalues_from = value # Los valores de la columna 'value' serán los que se asignen a las nuevas columnas )# Ver el resultadohead(CARACTAFRICA)
# A tibble: 6 × 16
countryIsoCode country year CONTINENTE Carbon dioxide emissions per capita …¹
<chr> <chr> <chr> <chr> <dbl>
1 AGO Angola 1990 AFRICA 0.43
2 AGO Angola 1991 AFRICA 0.414
3 AGO Angola 1992 AFRICA 0.409
4 AGO Angola 1993 AFRICA 0.441
5 AGO Angola 1994 AFRICA 0.843
6 AGO Angola 1995 AFRICA 0.907
# ℹ abbreviated name:
# ¹`Carbon dioxide emissions per capita (production) (tonnes)`
# ℹ 11 more variables: `Coefficient of human inequality` <dbl>,
# `Gender Inequality Index (value)` <dbl>, `Inequality in eduation` <dbl>,
# `Inequality in income` <dbl>, `Inequality in life expectancy` <dbl>,
# `Labour force participation rate, female (% ages 15 and older)` <dbl>,
# `Labour force participation rate, male (% ages 15 and older)` <dbl>, …
unique_isocodes1<-n_distinct(CARACT$countryIsoCode)unique_isocodes1#hay103 paises con lo cual concuerdaa
[1] 103
#Cambiamos los nombres columnas colnames(IDH)[7] <-"HDI"colnames(IDH)[1] <-"ISO"colnames(IDH)[5] <-"EYS"#Expected years of schoolingcolnames(IDH)[6] <-"GNP"#GROS NATIONAL INCOMEcolnames(IDH)[9] <-"LEB"#life expectancy at birthcolnames(IDH)[10] <-"MYS"#mean years of schoolingcolnames(CARACT)[1] <-"ISO"colnames(CARACT)[5] <-"CO2"colnames(CARACT)[6] <-"CHI"#Coefficient of human inequality"colnames(CARACT)[7] <-"GII"#Gender Inequality Indexcolnames(CARACT)[8] <-"IE"#Inequality in eduationcolnames(CARACT)[9] <-"II"#Inequality in incomecolnames(CARACT)[10] <-"ILE"#Inequality in life expectancycolnames(CARACT)[11] <-"LFPF"#Labour force participation rate, female(relacionado conel GII) describe el empowermentcolnames(CARACT)[12] <-"LFPM"#Labour force participation rate, male(relacionado conel GII)colnames(CARACT)[13] <-"MFC"#Material footprint per capita (tonnes)colnames(CARACT)[14] <-"MMR"#Maternal Mortality Ratio (relacionado conel GII)colnames(CARACT)[15] <-"SSPF"#Share of seats in parliament, female(relacionado conel GII)colnames(CARACT)[16] <-"SSPM"#Share of seats in parliament, male (relacionado conel GII)
paisesmapa <-st_read("mapabueno1.geojson")
Reading layer `mapabueno1' from data source
`/Users/belenroderorodriguez/Documents/TFG/datos/mapabueno1.geojson'
using driver `GeoJSON'
Simple feature collection with 256 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -180 ymin: -58.49861 xmax: 180 ymax: 83.6236
Geodetic CRS: WGS 84
#borramos monaco y andorra IDH_filtrado <- IDH[!IDH$ISO %in%c("AND", "MCO"), ]head(IDH_filtrado)
# A tibble: 6 × 10
ISO country year CONTINENTE EYS GNP HDI `HDI Rank` LEB MYS
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AGO Angola 2000 AFRICA 4.82 3913. 0.38 NA 46.0 3.43
2 AGO Angola 2001 AFRICA 5.25 3950. 0.39 NA 46.6 3.46
3 AGO Angola 2002 AFRICA 5.68 4659. 0.406 NA 47.4 3.50
4 AGO Angola 2003 AFRICA 6.10 4682. 0.424 NA 49.6 3.55
5 AGO Angola 2004 AFRICA 6.53 4926. 0.437 NA 50.6 3.59
6 AGO Angola 2005 AFRICA 6.96 5385. 0.451 NA 51.6 3.63
GASTOSALUD <- GASTOSALUD %>%semi_join(IDH, by =c("Country Code"="ISO"))GASTOSALUD <- GASTOSALUD %>%pivot_longer(cols =3:ncol(.), # Desde la 3ra columna hasta el final (los años)names_to ="year", # Nueva columna con los añosvalues_to ="gasto_salud"# Nueva columna con los valores del indicador ) %>%mutate(year =as.numeric(year)) # Convierte los años a numéricosGASTOSALUD <- GASTOSALUD %>%left_join(CARACT %>%select(ISO, CONTINENTE) %>%distinct(), by =c("Country Code"="ISO"))GASTOSALUD <- GASTOSALUD %>%rename(ISO =`Country Code`)CARACT$year <-as.numeric(CARACT$year)GASTOSALUD$year <-as.numeric(GASTOSALUD$year)names(CARACT)
AccesoElectricidad<- AccesoElectricidad %>%semi_join(IDH, by =c("Country Code"="ISO"))AccesoElectricidad <- AccesoElectricidad %>%select(c(-3,-4))AccesoElectricidad <- AccesoElectricidad %>%pivot_longer(cols =3:ncol(.), # Desde la 3ra columna hasta el final (los años)names_to ="year", # Nueva columna con los añosvalues_to ="AccesoElectricidad"# Nueva columna con los valores del indicador ) %>%mutate(year =as.numeric(year)) # Convierte los años a numéricosAccesoElectricidad <- AccesoElectricidad %>%left_join(CARACT %>%select(ISO, CONTINENTE) %>%distinct(), by =c("Country Code"="ISO"))AccesoElectricidad<-AccesoElectricidad %>%rename(ISO =`Country Code`)CARACT$year <-as.numeric(CARACT$year)AccesoElectricidadOyear <-as.numeric(AccesoElectricidad$year)names(CARACT)
# borro de tu base de datos paises que no necesito head(paises_filtrados)
Simple feature collection with 6 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 9.53357 ymin: -18.01639 xmax: 51.67701 ymax: 49.01875
Geodetic CRS: WGS 84
geo_point_2d iso3 status
1 { "lon": 17.544675786366358, "lat": -12.295285224744552 } AGO Member State
2 { "lon": 20.068384605918776, "lat": 41.142284823416894 } ALB Member State
3 { "lon": 44.946823543969643, "lat": 40.286619877420861 } ARM Member State
4 { "lon": 14.14019348879525, "lat": 47.592902606915196 } AUT Member State
5 { "lon": 48.819879230357841, "lat": 40.29691565933846 } AZE Member State
6 { "lon": 29.887145470708866, "lat": -3.3561748083588876 } BDI Member State
color_code name continent region iso_3166_1_alpha_2_codes
1 AGO Angola Africa Middle Africa AO
2 ALB Albania Europe Southern Europe AL
3 ARM Armenia Asia Western Asia AM
4 AUT Austria Europe Western Europe AT
5 AZE Azerbaijan Asia Western Asia AZ
6 BDI Burundi Africa Eastern Africa BI
french_short geometry
1 Angola MULTIPOLYGON (((23.98621 -1...
2 Albanie MULTIPOLYGON (((20.07142 42...
3 Arménie MULTIPOLYGON (((46.54038 38...
4 Autriche MULTIPOLYGON (((16.94618 48...
5 Azerbaïdjan MULTIPOLYGON (((46.17825 38...
6 Burundi MULTIPOLYGON (((30.57333 -2...
AGUAPOTABLE <- AGUAPOTABLE%>%arrange(AGUAPOTABLE[[1]])AGUAPOTABLE <- AGUAPOTABLE %>%semi_join(IDH, by =c("Country Code"="ISO"))AGUAPOTABLE <- AGUAPOTABLE %>%pivot_longer(cols =3:ncol(.), # Desde la 3ra columna hasta el final (los años)names_to ="year", # Nueva columna con los añosvalues_to ="AGUAPOTABLE"# Nueva columna con los valores del indicador ) %>%mutate(year =as.numeric(year)) # Convierte los años a numéricosAGUAPOTABLE <- AGUAPOTABLE %>%left_join(CARACT %>%select(ISO, CONTINENTE) %>%distinct(), by =c("Country Code"="ISO"))AGUAPOTABLE <- AGUAPOTABLE %>%rename(ISO =`Country Code`)CARACT$year <-as.numeric(CARACT$year)AGUAPOTABLE$year <-as.numeric(AGUAPOTABLE$year)names(CARACT)
CARACT <- CARACT %>%left_join(AGUAPOTABLE, by =c("ISO", "year"))
CARACT<- CARACT %>%select(-CONTINENTE.y) %>%# borra la que no quieresrename(CONTINENTE = CONTINENTE.x) # renombra la que te quedas
CARACT <- CARACT %>%#borro columnasselect(-19)
# Ordena los datos por la primera columna (Country Name)CPI <- CPI %>%arrange(CPI[[1]])# Realiza un semi_join con el dataframe IDH usando la columna 'ISO' en CPI y 'ISO' en IDHCPI <- CPI %>%semi_join(IDH, by =c("ISO"="ISO"))# Transformar los datos de CPI a formato largo (long format), de modo que cada año se convierta en una filaCPI <- CPI %>%pivot_longer(cols =3:ncol(.), # Desde la 3ra columna hasta el final (los años de CPI)names_to ="year", # Nueva columna con los añosvalues_to ="CPI"# Nueva columna con los valores del CPI ) %>%mutate(year =as.numeric(year)) # Convierte los años a numéricos# Si tienes un dataframe CARACT con la columna ISO y CONTINENTE, lo unimosCPI <- CPI %>%left_join(CARACT %>%select(ISO, CONTINENTE) %>%distinct(), by =c("ISO"="ISO"))# Renombramos la columna 'Country Code' a 'ISO' si es necesario (en este caso no es necesario)# CPI <- CPI %>%# rename(ISO = `Country Code`) # Solo haz esto si la columna 'ISO' en CPI necesita ser renombrada# Aseguramos que 'year' en CARACT y CPI esté en formato numéricoCARACT$year <-as.numeric(CARACT$year)CPI$year <-as.numeric(CPI$year)
CARACT <- CARACT %>%left_join(CPI, by =c("ISO", "year"))
CARACT<- CARACT %>%select(-CONTINENTE.y) %>%# borra la que no quieresrename(CONTINENTE = CONTINENTE.x) # renombra la que te quedas
CARACT <- CARACT %>%#borro columnasselect(-20)
Para podemos utilzar una distribución beta debemos comprobar que no hay valores 0 o 1.
sum(IDH$HDI ==1)
[1] NA
sum(IDH$HDI ==0)
[1] NA
Analisis Exploratorio
media_por_continente <- IDH %>%group_by(CONTINENTE, year) %>%summarise(media_idh =mean(HDI, na.rm =TRUE), .groups ="drop")# Convertir a dos arrays separados (uno por continente)europa <- media_por_continente %>%filter(CONTINENTE =="Europa") %>%pull(media_idh)africa <- media_por_continente %>%filter(CONTINENTE =="África") %>%pull(media_idh)CARACT <- CARACT %>%filter(!(year >=1990& year <=1999))# Combinar los data frames por la columna común 'País'combined_data <-merge(IDH, CARACT, by =c('country', 'year','CONTINENTE','ISO'))# Crear el gráfico de puntosafrica_data <- combined_data[combined_data$CONTINENTE =="AFRICA", ]europa_data <- combined_data[combined_data$CONTINENTE =="EUROPA", ]
sumtable( africa_subset ,add.median =FALSE, # opcional: si no quieres medianastitle ="", # sin título # salida en consolasumm =c("min(x)","pctile(x)[25]","pctile(x)[50]","mean(x)", "sd(x)","pctile(x)[75]", "max(x)") # puedes personalizar las estadísticas)
Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
Use 'xfun::attr2()' instead.
See help("Deprecated")
Variable
Min
Pctile[25]
Pctile[50]
Mean
Sd
Pctile[75]
Max
HDI
0.26
0.43
0.5
0.52
0.12
0.59
0.81
CO2
0.019
0.14
0.36
1.1
1.9
1
11
GII
0.24
0.52
0.57
0.56
0.098
0.63
0.81
LFPF
6.7
39
55
54
19
70
93
LFPM
39
64
71
71
10
78
99
MFC
0.12
2.8
3.8
4.9
3.2
6.2
26
MMR
3.1
233
443
479
332
627
1687
SSPF
0.01
9.7
15
18
11
24
58
SSPM
42
76
85
82
11
90
100
gasto_salud
0.18
5.5
11
48
84
47
611
AccesoElectricidad
0.8
18
41
44
30
65
100
AGUAPOTABLE
19
51
64
65
18
78
100
CPI
8
22
29
31
11
37
70
sumtable( europa_subset,add.median =FALSE, # opcional: si no quieres medianastitle ="", # sin título # salida en consolasumm =c("min(x)","pctile(x)[25]","pctile(x)[50]","mean(x)", "sd(x)","pctile(x)[75]", "max(x)") # puedes personalizar las estadísticas)
Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
Use 'xfun::attr2()' instead.
See help("Deprecated")
Variable
Min
Pctile[25]
Pctile[50]
Mean
Sd
Pctile[75]
Max
HDI
0.64
0.79
0.86
0.84
0.073
0.9
0.97
CO2
0.82
4.4
6.2
6.9
3.6
8.8
26
GII
0.009
0.095
0.16
0.17
0.1
0.23
0.54
LFPF
23
47
53
53
9.8
58
86
LFPM
41
63
67
67
6.5
71
88
MFC
3.3
13
17
20
9.3
26
61
MMR
1.1
5.4
8.1
12
11
14
68
SSPF
3.1
14
21
23
11
31
48
SSPM
52
69
79
77
11
86
97
gasto_salud
6
247
851
1590
1687
2718
7871
AccesoElectricidad
88
100
100
100
1.4
100
100
AGUAPOTABLE
73
97
100
98
3.6
100
100
CPI
15
38
53
56
22
75
100
ggplot(africa_data, aes(x =as.factor(year), y = HDI)) +# Violin pastelgeom_violin(fill ="#D0E6F6", color =NA, alpha =0.9, trim =FALSE) +# Boxplot más ancho y con línea más gruesageom_boxplot(width =0.4, # más ancho (0.4 en lugar de 0.2)fill =NA, color ="#5B8DB8", outlier.shape =NA,linewidth =0.8# línea más gruesa ) +# Puntosgeom_jitter(width =0.15, size =1.3, color ="#2C5F8A", alpha =0.6) +# Mediana en rojo pastelstat_summary(fun = median, geom ="point", shape =21, size =2.5,fill ="#F08080", color ="black", stroke =0.6 ) +labs(x ="Año", y ="Índice de Desarrollo Humano" ) +theme_minimal() +theme(axis.text.x =element_text(angle =90, vjust =0.5, hjust =1),plot.title =element_text(face ="bold", size =14),axis.title =element_text(size =12) )
Warning: Removed 55 rows containing non-finite outside the scale range
(`stat_ydensity()`).
Warning: Removed 55 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 55 rows containing non-finite outside the scale range
(`stat_summary()`).
Warning: Removed 55 rows containing missing values or values outside the scale range
(`geom_point()`).
ggplot(europa_data, aes(x =as.factor(year), y = HDI)) +# Violin pastelgeom_violin(fill ="#D0E6F6", color =NA, alpha =0.9, trim =FALSE) +# Boxplot más ancho y con línea más gruesageom_boxplot(width =0.4, # más ancho (0.4 en lugar de 0.2)fill =NA, color ="#5B8DB8", outlier.shape =NA,linewidth =0.8# línea más gruesa ) +# Puntosgeom_jitter(width =0.15, size =1.3, color ="#2C5F8A", alpha =0.6) +# Mediana en rojo pastelstat_summary(fun = median, geom ="point", shape =21, size =2.5,fill ="#F08080", color ="black", stroke =0.6 ) +labs(x ="Año", y ="Índice de Desarrollo Humano" ) +theme_minimal() +theme(axis.text.x =element_text(angle =90, vjust =0.5, hjust =1),plot.title =element_text(face ="bold", size =14),axis.title =element_text(size =12) )
Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_summary()`).
Warning: Removed 3 rows containing missing values or values outside the scale range
(`geom_point()`).
ggplot(europa_data, aes(x =as.factor(year), y = HDI)) +# Violin pastelgeom_violin(fill ="#D0E6F6", color =NA, alpha =0.9, trim =FALSE) +# Boxplot más ancho y con línea más gruesageom_boxplot(width =0.4, # más ancho (0.4 en lugar de 0.2)fill =NA, color ="#5B8DB8", outlier.shape =NA,linewidth =0.8# línea más gruesa ) +# Puntosgeom_jitter(width =0.15, size =1.3, color ="#2C5F8A", alpha =0.6) +# Mediana en rojo pastelstat_summary(fun = median, geom ="point", shape =21, size =2.5,fill ="#F08080", color ="black", stroke =0.6 ) +labs(x ="Año", y ="Índice de Desarrollo Humano" ) +theme_minimal() +theme(axis.text.x =element_text(angle =90, vjust =0.5, hjust =1),plot.title =element_text(face ="bold", size =14),axis.title =element_text(size =12) )
Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_ydensity()`).
Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_summary()`).
Warning: Removed 3 rows containing missing values or values outside the scale range
(`geom_point()`).
hdi_quantiles <- africa_data %>%group_by(year) %>%summarise(Q1 =quantile(HDI, 0.25, na.rm =TRUE),Q2 =quantile(HDI, 0.50, na.rm =TRUE),Q3 =quantile(HDI, 0.75, na.rm =TRUE) ) %>%mutate(date =as.Date(paste0(year, "-01-01")))# Paso 2: convertir a xtshdi_xts <-xts(hdi_quantiles[, c("Q1", "Q2", "Q3")], order.by = hdi_quantiles$date)# Paso 3: gráfico interactivo con estilodygraph(hdi_xts, main ="Distribución del HDI en África (cuantiles)") %>%dySeries("Q1", label ="Q1 (25%)", color ="#1b9e77") %>%dySeries("Q2", label ="Mediana", color ="#984ea3") %>%dySeries("Q3", label ="Q3 (75%)", color ="#377eb8") %>%dyOptions(strokeWidth =2.5) %>%dyHighlight(highlightCircleSize =5,highlightSeriesBackgroundAlpha =0.2,highlightSeriesOpts =list(strokeWidth =4) ) %>%dyRangeSelector(height =30, strokeColor ="#666", fillColor ="#d0ebff")
hdi_quantiles1 <- europa_data %>%group_by(year) %>%summarise(Q1 =quantile(HDI, 0.25, na.rm =TRUE),Q2 =quantile(HDI, 0.50, na.rm =TRUE),Q3 =quantile(HDI, 0.75, na.rm =TRUE) ) %>%mutate(date =as.Date(paste0(year, "-01-01")))# Paso 2: convertir a xtshdi_xts <-xts(hdi_quantiles1[, c("Q1", "Q2", "Q3")], order.by = hdi_quantiles1$date)# Paso 3: gráfico interactivo con estilodygraph(hdi_xts, main ="Distribución del HDI en Europa (cuantiles)") %>%dySeries("Q1", label ="Q1 (25%)", color ="#1b9e77") %>%dySeries("Q2", label ="Mediana", color ="#984ea3") %>%dySeries("Q3", label ="Q3 (75%)", color ="#377eb8") %>%dyOptions(strokeWidth =2.5) %>%dyHighlight(highlightCircleSize =5,highlightSeriesBackgroundAlpha =0.2,highlightSeriesOpts =list(strokeWidth =4) ) %>%dyRangeSelector(height =30, strokeColor ="#666", fillColor ="#d0ebff")
media_por_continente2 <- IDH %>%group_by(CONTINENTE, year) %>%summarise(media_LEB =mean(LEB, na.rm =TRUE), .groups ="drop")# Convertir a dos arrays separados (uno por continente)europa2<- media_por_continente2 %>%filter(CONTINENTE =="Europa") %>%pull(media_LEB)africa2 <- media_por_continente2 %>%filter(CONTINENTE =="África") %>%pull(media_LEB)graficoLEBE<-ggplot(media_por_continente2 %>%filter(CONTINENTE =="EUROPA"), aes(x = year, y = media_LEB, group = CONTINENTE, color = CONTINENTE)) +geom_line() +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))+labs(title ="Europa", x ="Año", y ="LEB") +theme(legend.position ="none")graficoLEBA <-ggplot(media_por_continente2 %>%filter(CONTINENTE =="AFRICA"), aes(x = year, y = media_LEB, group = CONTINENTE, color = CONTINENTE)) +geom_line() +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))+labs(title ="AFRICA", x ="Año", y ="LEB") +theme(legend.position ="none")
hdi_quantiles1 <- europa_data %>%group_by(year) %>%summarise(Q1 =quantile(LEB, 0.25, na.rm =TRUE),Q2 =quantile(LEB, 0.50, na.rm =TRUE),Q3 =quantile(LEB, 0.75, na.rm =TRUE) ) %>%mutate(date =as.Date(paste0(year, "-01-01")))# Paso 2: convertir a xtshdi_xts <-xts(hdi_quantiles1[, c("Q1", "Q2", "Q3")], order.by = hdi_quantiles1$date)# Paso 3: gráfico interactivo con estilodygraph(hdi_xts, main ="Distribución de la esperanza de vida en Europa (cuantiles)") %>%dySeries("Q1", label ="Q1 (25%)", color ="#1b9e77") %>%dySeries("Q2", label ="Mediana", color ="#984ea3") %>%dySeries("Q3", label ="Q3 (75%)", color ="#377eb8") %>%dyOptions(strokeWidth =2.5) %>%dyHighlight(highlightCircleSize =5,highlightSeriesBackgroundAlpha =0.2,highlightSeriesOpts =list(strokeWidth =4) ) %>%dyRangeSelector(height =30, strokeColor ="#666", fillColor ="#d0ebff")
hdi_quantiles1 <- europa_data %>%group_by(year) %>%summarise(Q1 =quantile(GNP, 0.25, na.rm =TRUE),Q2 =quantile(GNP, 0.50, na.rm =TRUE),Q3 =quantile(GNP, 0.75, na.rm =TRUE) ) %>%mutate(date =as.Date(paste0(year, "-01-01")))# Paso 2: convertir a xtshdi_xts <-xts(hdi_quantiles1[, c("Q1", "Q2", "Q3")], order.by = hdi_quantiles1$date)# Paso 3: gráfico interactivo con estilodygraph(hdi_xts, main ="Distribución del ingreso nacional bruto en Europa (cuantiles)") %>%dySeries("Q1", label ="Q1 (25%)", color ="#1b9e77") %>%dySeries("Q2", label ="Mediana", color ="#984ea3") %>%dySeries("Q3", label ="Q3 (75%)", color ="#377eb8") %>%dyOptions(strokeWidth =2.5) %>%dyHighlight(highlightCircleSize =5,highlightSeriesBackgroundAlpha =0.2,highlightSeriesOpts =list(strokeWidth =4) ) %>%dyRangeSelector(height =30, strokeColor ="#666", fillColor ="#d0ebff")
media_por_continente1 <- IDH %>%group_by(CONTINENTE, year) %>%summarise(media_GNP =mean(GNP, na.rm =TRUE), .groups ="drop")# Convertir a dos arrays separados (uno por continente)europa1<- media_por_continente1 %>%filter(CONTINENTE =="Europa") %>%pull(media_GNP)africa1 <- media_por_continente1 %>%filter(CONTINENTE =="África") %>%pull(media_GNP)graficoGNPE <-ggplot(media_por_continente1 %>%filter(CONTINENTE =="EUROPA"), aes(x = year, y = media_GNP, group = CONTINENTE, color = CONTINENTE)) +geom_line() +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))+labs(title ="Europa", x ="Año", y ="GNP") +theme(legend.position ="none")graficoGNPA <-ggplot(media_por_continente1 %>%filter(CONTINENTE =="AFRICA"), aes(x = year, y = media_GNP, group = CONTINENTE, color = CONTINENTE)) +geom_line() +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))+labs(title ="AFRICA", x ="Año", y ="GNP") +theme(legend.position ="none")
The following object is masked from 'package:pls':
corrplot
cor_spearman_europa <-cor(europa_data[, 5:ncol(europa_data)], method ="pearson", use ="pairwise.complete.obs")corrplot(cor_spearman_europa, method ="color", type ="upper", tl.cex =0.7, title ="Correlaciones de Spearman en Europa", addCoef.col ="black", number.cex =0.7, mar =c(0, 0, 2, 0))
cor_spearman <-cor(africa_data[, 5:ncol(africa_data)], method ="spearman", use ="pairwise.complete.obs")corrplot(cor_spearman, method ="color", type ="upper", tl.cex =0.7, title ="Correlaciones de Spearman en África", addCoef.col ="black", number.cex =0.7, mar =c(0, 0, 2, 0))
MFCE<-ggplot(europa_data%>%mutate(year=as.numeric(year)), aes(x = MFC, y = HDI, color = year)) +geom_point(size =2) +labs(x ="MFC", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()MFCA<-ggplot(africa_data%>%mutate(year=as.numeric(year)), aes(x = MFC, y = HDI, color = year)) +geom_point(size =2) +labs(x ="MFC", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()MFCE
MFCA
GSE<-ggplot(europa_data%>%mutate(year=as.numeric(year)), aes(x = gasto_salud, y = HDI, color = year)) +geom_point(size =2) +labs(x ="Gasto en Salud", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()GSA<-ggplot(africa_data%>%mutate(year=as.numeric(year)), aes(x = gasto_salud, y = HDI, color = year)) +geom_point(size =2) +labs(x ="Gasto en Salud", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()GSE
GSA
### Cambia los gráficos a estoGSEAlirio<-ggplot(europa_data%>%mutate(year=as.numeric(year)), aes(x = gasto_salud, y = HDI, color = year)) +geom_point(size =2) +labs(x ="Gasto en Salud", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_minimal()GSEAlirio
GSEA<-ggplot(europa_data%>%mutate(year=as.numeric(year)), aes(x = gasto_salud, y = HDI, color = year)) +geom_point(size =2) +labs(x ="Gasto en Salud", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()GSEAlirio
GSAL<-ggplot(africa_data%>%mutate(year=as.numeric(year)), aes(x =log(gasto_salud), y = HDI, color = year)) +geom_point(size =2) +geom_smooth(method ="lm", se =FALSE, color ="black")+labs(x ="Gasto en salud", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()GSAL
Call:
lm(formula = HDI ~ log(gasto_salud), data = africa_data)
Residuals:
Min 1Q Median 3Q Max
-0.160518 -0.031738 0.001597 0.036008 0.187612
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.338185 0.004970 68.05 <2e-16 ***
log(gasto_salud) 0.065402 0.001584 41.30 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.05513 on 675 degrees of freedom
Multiple R-squared: 0.7165, Adjusted R-squared: 0.716
F-statistic: 1706 on 1 and 675 DF, p-value: < 2.2e-16
plot(LMAGS)
residuos_dfGSA <-data.frame(residuos_est =rstandard(LMAGS))QQPLOTA<-ggplot(residuos_dfGSA, aes(sample = residuos_est)) +stat_qq(color ="#0072B2", size =2, alpha =0.7) +# puntosstat_qq_line(color ="#D55E00", linewidth =1.2, linetype ="dashed") +# línea de referencialabs(title ="QQ Plot de los residuos estandarizados",x ="Cuantiles teóricos",y ="Cuantiles de los residuos") +theme_minimal(base_size =14) +theme(plot.title =element_text(hjust =0.5, face ="bold"),panel.grid.major =element_line(color ="gray90") )QQPLOTA
residuosGSA <-data.frame(fitted =fitted(LMAGS),resid =resid(LMAGS))ResidualsvsfittedLMAGS <-ggplot(residuosGSA, aes(x = fitted, y = resid)) +geom_point(color ="#0072B2", alpha =0.6, size =2) +# puntosgeom_smooth(method ="loess", se =FALSE, color ="#D55E00", linewidth =1.2) +# curva de tendenciageom_hline(yintercept =0, linetype ="dashed", color ="gray40") +# línea baselabs(title ="Residuals vs Fitted Values",x ="Fitted values",y ="Residuals") +theme_minimal(base_size =14) +theme(plot.title =element_text(hjust =0.5, face ="bold"),panel.grid.major =element_line(color ="gray90") )ResidualsvsfittedLMAGS
`geom_smooth()` using formula = 'y ~ x'
library(lmtest)bptest(LMAGS)
studentized Breusch-Pagan test
data: LMAGS
BP = 0.82498, df = 1, p-value = 0.3637
GSEL<-ggplot(europa_data%>%mutate(year=as.numeric(year)), aes(x =log(gasto_salud), y = HDI, color = year)) +geom_point(size =2) +geom_smooth(method ="lm", se =FALSE, color ="black")+labs(x ="Gasto en salud", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()GSEL
Call:
lm(formula = HDI ~ GII, data = africa_data)
Residuals:
Min 1Q Median 3Q Max
-0.211161 -0.044161 0.000891 0.051845 0.162640
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.98933 0.01852 53.43 <2e-16 ***
GII -0.82495 0.03243 -25.44 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.07398 on 675 degrees of freedom
Multiple R-squared: 0.4895, Adjusted R-squared: 0.4887
F-statistic: 647.2 on 1 and 675 DF, p-value: < 2.2e-16
APA<-ggplot(africa_data%>%mutate(year=as.numeric(year)), aes(x = AGUAPOTABLE, y = HDI, color = year)) +geom_point(size =2) +labs(x ="Agua potable", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()APE<-ggplot(europa_data%>%mutate(year=as.numeric(year)), aes(x = AGUAPOTABLE, y = HDI, color = year)) +geom_point(size =2) +labs(x ="Agua potable", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()APA
APE
CPIE<-ggplot(europa_data%>%mutate(year=as.numeric(year)), aes(x = CPI, y = HDI, color = year)) +geom_point(size =2) +labs(x ="CPI", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()CPIE
SSPFE<-ggplot(europa_data%>%mutate(year=as.numeric(year)), aes(x = SSPF, y = HDI, color = year)) +geom_point(size =2) +labs(x ="SSPF", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()SSPFE
LFPFA<-ggplot(africa_data%>%mutate(year=as.numeric(year)), aes(x = LFPF, y = HDI, color = year)) +geom_point(size =2) +labs(x ="LFPF", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()
AEA<-ggplot(africa_data%>%mutate(year=as.numeric(year)), aes(x = AccesoElectricidad, y = HDI, color = year)) +geom_point(size =2) +labs(x ="Acceso electricidad", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()AEA
CO2A<-ggplot(africa_data%>%mutate(year=as.numeric(year)), aes(x = CO2, y = HDI, color = year)) +geom_point(size =2) +labs(x ="CO2", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()CO2A
CO2E<-ggplot(europa_data%>%mutate(year=as.numeric(year)), aes(x = CO2, y = HDI, color = year)) +geom_point(size =2) +labs(x ="CO2", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()
CO2AL<-ggplot(africa_data%>%mutate(year=as.numeric(year)), aes(x =log(CO2), y = HDI, color = year)) +geom_point(size =2) +geom_smooth(method ="lm", se =FALSE, color ="black")+labs(x ="CO2", y ="IDH", color ="Año") +scale_color_gradient2(low="blue",midpoint =2010,mid ="gray80" , high="red")+theme_bw()CO2AL
# Data frame con residuos y valores ajustadosdf_residuos1 <-data.frame(Ajustados =fitted(modelo_glmEuropa33),Residuos =resid(modelo_glmEuropa33, type ="pearson"))# Gráfico mejoradoggplot(df_residuos1, aes(x = Ajustados, y = Residuos)) +geom_point(alpha =0.7, color ="#0072B2") +geom_hline(yintercept =0, linetype ="dashed", color ="red") +labs(title="Residuos de Pearson vs. Valores Ajustados",x ="Valores ajustados",y ="Residuos de Pearson" ) +theme_minimal(base_size =13)
ggplot(df_residuos1, aes(sample = Residuos)) +stat_qq(alpha =0.7, color ="#009E73") +stat_qq_line(color ="red", linetype ="dotted") +labs(title ="Q-Q Plot de los Residuos de Pearson",x ="Cuantiles teóricos",y ="Cuantiles de los residuos" ) +theme_minimal(base_size =13)
library(qqplotr)
Attaching package: 'qqplotr'
The following objects are masked from 'package:ggplot2':
stat_qq_line, StatQqLine
ggplot(df_residuos1, aes(sample = Residuos)) +stat_qq_band(distribution ="norm", alpha =0.3, fill ="#56B4E9") +# Banda de confianzastat_qq_point(alpha =0.7, color ="#009E73") +stat_qq_line(distribution ="norm", color ="red", linetype ="dotted") +labs(x ="Cuantiles teóricos",y ="Cuantiles de los residuos" ) +theme_minimal(base_size =13)
residuosgam <-data.frame(Residuos =residuals(modelogamEuropa, type ="deviance"))# 3. Crear QQ-plot con ggplot2 + qqplotrggplot(residuosgam , aes(sample = Residuos)) +stat_qq_band(distribution ="norm", alpha =0.3, fill ="#56B4E9") +# Banda de confianzastat_qq_point(alpha =0.7, color ="#009E73") +stat_qq_line(distribution ="norm", color ="red", linetype ="dotted") +labs(title ="QQ-plot de residuos del modelo GAM (Europa)",x ="Cuantiles teóricos",y ="Cuantiles de los residuos" ) +theme_minimal(base_size =13)
residuosgam2 <-data.frame(Ajustados =fitted(modelogamEuropa), # Valores ajustadosResiduos =residuals(modelogamEuropa, type ="pearson") # Residuos de Pearson)# 2. Gráfico con ggplot2ggplot(residuosgam2, aes(x = Ajustados, y = Residuos)) +geom_point(alpha =0.7, color ="#0072B2") +geom_hline(yintercept =0, linetype ="dashed", color ="red") +labs(x ="Valores ajustados",y ="Residuos de Pearson" ) +theme_minimal(base_size =13)
pls_model <-plsr(HDI ~GII +CPI+log(gasto_salud), scale =TRUE, data = europa_data, validation ="CV")# Ver cuántos componentes usarsummary(pls_model)
Data: X dimension: 902 3
Y dimension: 902 1
Fit method: kernelpls
Number of components considered: 3
VALIDATION: RMSEP
Cross-validated using 10 random segments.
(Intercept) 1 comps 2 comps 3 comps
CV 0.07288 0.0248 0.02360 0.02349
adjCV 0.07288 0.0248 0.02359 0.02349
TRAINING: % variance explained
1 comps 2 comps 3 comps
X 87.30 92.44 100.00
HDI 88.43 89.60 89.67
scores_pls <- pls_model$scores[, 1:2] # Usar los primeros dos componentes#scores_pls #pls_model$loading.weights
europa_data$scores_1 <- scores_pls[, 1] # Primer componenteeuropa_data$scores_2 <- scores_pls[, 2] # Primer componentegamgam_pls <-gam(HDI ~s(SSPF, bs ="ps") +s(MMR, bs ="tp") +s(AGUAPOTABLE)++scores_1+ scores_2, data = europa_data, select =TRUE, family =betar(link ="logit"), method ="GCV")gam.check(gamgam_pls)
Method: REML Optimizer: outer newton
full convergence after 21 iterations.
Gradient range [-0.0002958453,0.0003322675]
(score -2209.515 & scale 1).
Hessian positive definite, eigenvalue range [0.1172235,438.8201].
Model rank = 30 / 30
Basis dimension (k) checking results. Low p-value (k-index<1) may
indicate that k is too low, especially if edf is close to k'.
k' edf k-index p-value
s(SSPF) 9.00 3.46 0.55 <2e-16 ***
s(MMR) 9.00 7.63 0.95 0.065 .
s(AGUAPOTABLE) 9.00 6.56 0.82 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Method: REML Optimizer: outer newton
full convergence after 7 iterations.
Gradient range [-6.124927e-05,0.0005363199]
(score -2195.54 & scale 1).
Hessian positive definite, eigenvalue range [3.540861e-06,444.0262].
Model rank = 20 / 20
Basis dimension (k) checking results. Low p-value (k-index<1) may
indicate that k is too low, especially if edf is close to k'.
k' edf k-index p-value
s(LFPF) 9.00 2.74 0.91 0.005 **
s(MMR) 9.00 7.26 0.93 0.040 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(gamgam_pls2)
library(fitdistrplus)
Loading required package: MASS
Attaching package: 'MASS'
The following object is masked from 'package:dplyr':
select
Loading required package: survival
# Extraer la variableidh <- europa_data$HDI# Escalar a (0, 1) si no lo está (solo si necesario)idh_scaled <- (idh -min(idh)) / (max(idh) -min(idh))# Ajustar distribución betafit <-fitdist(idh_scaled, "beta")# Curva de densidad beta ajustadax_vals <-seq(0, 1, length.out =500)beta_curve <-dbeta(x_vals,shape1 = fit$estimate["shape1"],shape2 = fit$estimate["shape2"])df_curve <-data.frame(x = x_vals, y = beta_curve)# Graficarggplot(data.frame(idh_scaled), aes(x = idh_scaled)) +geom_histogram(aes(y = ..density..), bins =30,fill ="skyblue", color ="black", alpha =0.7) +geom_line(data = df_curve, aes(x = x, y = y), color ="red", linewidth =1.2) +labs(title ="Ajuste de una distribución beta al IDH",x ="IDH (escalado)", y ="Densidad")
Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(density)` instead.
fit$estimate
shape1 shape2
2.518679 1.466924
library(fitdistrplus)library(goftest)fit_beta <-fitdist(europa_data$HDI, "beta", method ="mle")# Ver resumen del ajustesummary(fit_beta)
Fitting of the distribution ' beta ' by maximum likelihood
Parameters :
estimate Std. Error
shape1 20.384026 0.9729091
shape2 3.753954 0.1696027
Loglikelihood: 1140.685 AIC: -2277.37 BIC: -2267.76
Correlation matrix:
shape1 shape2
shape1 1.0000000 0.9237282
shape2 0.9237282 1.0000000
# Graficar el ajuste (opcional)plot(fit_beta)
# Usar test de Anderson-Darling (más adecuado con ties)ad_test <-ad.test(europa_data$HDI, null =function(x) pbeta(x,shape1 = fit_beta$estimate["shape1"],shape2 = fit_beta$estimate["shape2"]))print(ad_test)
Anderson-Darling test of goodness-of-fit
Null hypothesis: distribution 'function(x) pbeta(x, shape1 =
fit_beta$estimate["shape1"], shape2 = fit_beta$estimate["shape2"])'
Parameters assumed to be fixed
data: europa_data$HDI
An = 4.001, p-value = 0.008714